---
title: "Cleaning Cronos"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load Cronos
  cronos_raw <- 
    read_sav("../raw-data/y-multi-cronos/CRONOS_ESS8_e01_2.sav")
```

# Clean 

```{r}
# dates
  cronos_dates <- 
    tribble(
      ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
      "UK", "06-09-2016", "06-03-2017") %>% # empirical min and max of UK dates to account for a single missing value (!)
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date, "%d-%m-%Y"),
      resp_interview_end_date = as.Date(resp_interview_end_date, "%d-%m-%Y"))


# clean
  clean_cronos <- 
    cronos_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "CROss-National Online Survey",
        
      # round number (character vector, title case)  
        resp_round = "Wave 1",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3URDxpy",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "internet",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = 
          case_when(
            cntry == 'GB' ~ 'Great Britain',
            cntry == 'EE' ~ 'Estonia',
            cntry == 'SI' ~ 'Slovenia'),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = make_date(year = inwyye, month = inwmme, day = inwdde)) %>% 
        left_join(
          cronos_dates, by = "resp_country_common") %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          dplyr::recode(
            as.character(rlgdnm),
            "1" = "Christian",
            "2" = "Christian",
            "3" = "Christian",
            "4" = "Christian",
            "5" = "Jewish",
            "6" = "Muslim",
            "7" = "Other religion",
            "8" = "Other religion",
            .default = NA_character_),      

      # respondent's religion (character vector that corresponds to master list)
        resp_denomination = to_character(rlgdnm),  
    
      # respondent's age (character vector; bins denoted by single dash ["18-25"]) 
        resp_age = as.character(agea),
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(edulvlb),
            "0" = "1. Not completed ISCED level 1 [No education]",
            "113" = "2. ISCED 1, completed primary education [Primary]",
            "129" = "3. Vocational ISCED 2C < 2 years, no access ISCED 3 [Primary]",
            "212" = "4. General/pre-vocational ISCED 2A/2B, access ISCED 3 vocational [Primary]",
            "213" = "5. General ISCED 2A, access ISCED 3A general/all 3 [Primary]",
            "221" = "6. Vocational ISCED 2C >= 2 years, no access ISCED 3 [Primary]",
            "222" = "7. Vocational ISCED 2A/2B, access ISCED 3 vocational [Primary]",
            "223" = "8. Vocational ISCED 2, access ISCED 3 general/all [Primary]",
            "229" = "9. Vocational ISCED 3C < 2 years, no access ISCED 5 [Primary]",
            "311" = "10. General ISCED 3 >=2 years, no access ISCED 5 [Primary]",
            "312" = "11. General ISCED 3A/3B, access ISCED 5B/lower tier 5A [Primary]",
            "313" = "12. General ISCED 3A, access upper tier ISCED 5A/all 5 [Primary]",
            "321" = "13. Vocational ISCED 3C >= 2 years, no access ISCED 5 [Primary]",
            "322" = "14. Vocational ISCED 3A, access ISCED 5B/ lower tier 5A [Primary]",
            "323" = "15. Vocational ISCED 3A, access upper tier ISCED 5A/all 5 [Primary]",
            "412" = "16. General ISCED 4A/4B, access ISCED 5B/lower tier 5A [Primary]",
            "413" = "17. General ISCED 4A, access upper tier ISCED 5A/all 5 [Primary]",
            "421" = "18. ISCED 4 programmes without access ISCED 5 [Primary]",
            "422" = "19. Vocational ISCED 4A/4B, access ISCED 5B/lower tier 5A [Primary]",
            "423" = "20. Vocational ISCED 4A, access upper tier ISCED 5A/all [Primary]",
            "510" = "21. ISCED 5A short, intermediate/academic/general tertiary below bachelor [Primary]",
            "520" = "22. ISCED 5B short, advanced vocational qualifications [Primary]",
            "610" = "23. ISCED 5A medium, bachelor/equivalent from lower tier tertiary [College]",
            "620" = "24. IISCED 5A medium, bachelor/equivalent from upper/single tier tertiary [College]",
            "710" = "25. ISCED 5B short, master/equivalent from lower tier tertiary [College]",
            "720" = "26. ISCED 5B short, master/equivalent from upper/single tier tertiary [College]",
            "800" = "27. ISCED 6, doctoral degree [College]",
            .default = NA_character_),       
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = (gndr == 2)*1,
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural = 
          case_when(
            domicil %in% c(1:3) ~ 0,
            domicil %in% c(4:5) ~ 1,
            TRUE ~ NA_real_),  
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: w1q12; QTEXT: How much do you trust people of another religion?; ROPTIONS: 1 = Trust completely [=0] + 2 = Trust somewhat [=0] + 3 = Do not trust very much [=1] + 4 = Do not trust at all [=1]; TARGET: Different religion; TYPE: Trust",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(w1q12),
            "-33" = NA_character_,
            "-77" = NA_character_,
            "-88" = NA_character_,
            "-99" = NA_character_,),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            w1q12 %in% c(1:2) ~ 0,
            w1q12 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above) [NB two questions coded in opposite directions]
        resp_gentrust_qinfo = "NUM: w2q9a/w2q9b; QTEXT: (a) Generally speaking, would you say that most people can be trusted? (b)  Generally speaking, would you say that you can’t be too careful in dealing with people or that most people can be trusted?; ROPTIONS: (a) 1 = Most people can be trusted + 2 = Can’t be too careful (b) 1 = Can’t be too careful + 2 = Most people can be trusted",
    
      # original response (as character vector)
        resp_gentrust_original = NA_character_,       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = 
          case_when(
            is.na(w2q9a) == FALSE & w2q9a == 1 ~ 0,
            is.na(w2q9a) == FALSE & w2q9a == 2 ~ 1,
            is.na(w2q9a) == TRUE & w2q9b == 1 ~ 1,
            is.na(w2q9a) == TRUE & w2q9b == 2 ~ 0),
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: w1q6; QTEXT: How important is religion in your life?;  ROPTIONS: 1 = Very important + 2 = Quite important + 3 = Not important + 4 = Not at all important",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(w1q6),
            "-33" = NA_real_,
            "-77" = NA_real_,
            "-88" = NA_real_,
            "-99" = NA_real_,
            "NA" = NA_real_),    

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (4 - resp_religiosity_original)/3,
    
    ) %>% 
    select(starts_with("resp_"))
```

# Save data

```{r}
  saveRDS(clean_cronos, "../cleaned-data/y-25-multi-cronos.rds")
```